from  urllib import request,error
import  re

def getInfo(key):
    #使用urllib爬取猫眼top100的电影信息
    url="http://maoyan.com/board/4?offset="+str(key)+'0'
    print(url)

    headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'}

    req=request.Request(url,headers=headers)

    res=request.urlopen(req)

    html=res.read().decode("utf-8")

    # print(html)


    ##调试正则文本
    html2='''<dd>
                            <i class="board-index board-index-11">11</i>
        <a href="/films/1228" title="天空之城" class="image-link" data-act="boarditem-click" data-val="{movieId:1228}">
          <img src="//ms0.meituan.net/mywww/image/loading_2.e3d934bf.png" alt="" class="poster-default" />
          <img data-src="http://p1.meituan.net/movie/ba1ed511668402605ed369350ab779d6319397.jpg@160w_220h_1e_1c" alt="天空之城" class="board-img" />
        </a>
        <div class="board-item-main">
          <div class="board-item-content">
                  <div class="movie-item-info">
            <p class="name"><a href="/films/1228" title="天空之城" data-act="boarditem-click" data-val="{movieId:1228}">天空之城</a></p>
            <p class="star">
                    主演：寺田农,鹫尾真知子,龟山助清
            </p>
    <p class="releasetime">上映时间：1992</p>    </div>
        <div class="movie-item-number score-num">
    <p class="score"><i class="integer">9.</i><i class="fraction">1</i></p>        
        </div>
    
          </div>
        </div>
    
                    </dd>'''

    pat='<dd>\n.*?<i class="board-index board-index-.*?">(.*?)</i>\n.*?<a href=".*?" title="(.*?)".*?\n.*?<img src="//(.*?)".*?\n.*?<img data-src.*?\n.*?\n.*?\n.*?\n.*?\n.*?\n.*?<p class="star">\n(.*?)\n.*?\n.*?<p class="releasetime">(.*?)</p>.*?\n.*?\n.*?<p class="score"><i class="integer">(.*?)</i>'

    dlist=re.findall(pat,html)
    # print(dlist)

    with open('猫眼top100 电影集合.txt', 'a+',encoding='utf-8') as f:
        f.write("猫眼top100 电影集合："+"\n")
        for v in dlist:

            f.write("序号:"+v[0]+"\n")
            f.write("电影名称:" + v[1]+"\n")
            f.write("图片地址:" + v[2]+"\n")
            f.write(v[3].replace(' ','')+"\n")
            f.write(v[4].replace(' ','')+"\n")

            # print("序号:"+v[0])
            # print("电影名称:" + v[1])
            # print("图片地址:" + v[2])
            # print(v[3].replace(' ',''))
            # print(v[4].replace(' ', ''))

            #美化评分数值
            if len(v[5])==2:
                f.write("评分:" + v[5]+'0'+"\n")
                print("评分:" + v[5]+'0')
            else:
                f.write("评分:" + v[5]+"\n")
                print("评分:" + v[5])
            f.write("\n")

if __name__=='__main__':
    for i in range(10):
        getInfo(i)